This document describes how to go from raw vcftools output of diversity metrics (Fst, pi, and Tajima’s D) to Manhattan plots, including making figures for the manuscript associated with this repo. This script is a result of brute forcing things to work: I am certainly no expert, and thus lots of the notes refer to my own dumb mistakes.
library(tidyverse)
library(qqman)
library(scales)
First, read in raw output files from vcftools (in this repo, see filter-scan.sh for code to generate).
fst.UKUS.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/EUSTreseq_pseud_nostep_UKUS_50kb.windowed.weir.fst",sep="\t"))
fst.AUUK.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/EUSTreseq_pseud_nostep_AUUK_50kb.windowed.weir.fst",sep="\t"))
fst.USAU.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/EUSTreseq_pseud_nostep_AUUS_50kb.windowed.weir.fst",sep="\t"))
pi.UK.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/EUSTreseq_pseudochrom_UK_pi_50kb.windowed.pi",sep="\t"))
pi.AU.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/EUSTreseq_pseudochrom_AU_pi_50kb.windowed.pi",sep="\t"))
pi.US.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/EUSTreseq_pseudochrom_US_pi_50kb.windowed.pi",sep="\t"))
TajD.UK.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/EUSTreseq_pseudochrom_UK_TajimaD_50kb.Tajima.D",sep="\t"))
TajD.AU.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/EUSTreseq_pseudochrom_AU_TajimaD_50kb.Tajima.D",sep="\t"))
TajD.US.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/EUSTreseq_pseudochrom_US_TajimaD_50kb.Tajima.D",sep="\t"))
vcftools outputs a few identifies for position: “CHROM,” “BIN_START” and “BIN_END” for .fst and .pi files, but only “CHROM” AND “BIN_START” for .Tajima.D files. Unfortunately, the numbering is off, so we’ll add 1 to every “BIN_START” in the .Tajima.D files.
head(TajD.AU.50kb)
## # A tibble: 6 x 4
## CHROM BIN_START N_SNPS TajimaD
## <fct> <int> <int> <dbl>
## 1 10 0 147 0.583
## 2 10 50000 334 0.372
## 3 10 100000 301 0.836
## 4 10 150000 98 1.56
## 5 10 200000 102 -0.557
## 6 10 250000 139 -0.0356
head(fst.AUUK.50kb)
## # A tibble: 6 x 6
## CHROM BIN_START BIN_END N_VARIANTS WEIGHTED_FST MEAN_FST
## <fct> <int> <int> <int> <dbl> <dbl>
## 1 10 1 50000 164 0.00792 0.0105
## 2 10 50001 100000 379 0.0595 0.0500
## 3 10 100001 150000 311 0.00728 -0.000459
## 4 10 150001 200000 120 0.0162 0.0275
## 5 10 200001 250000 121 0.126 0.0926
## 6 10 250001 300000 155 0.109 0.0747
TajD.UK.50kb$BIN_START <- TajD.UK.50kb$BIN_START + 1
TajD.AU.50kb$BIN_START <- TajD.AU.50kb$BIN_START + 1
TajD.US.50kb$BIN_START <- TajD.US.50kb$BIN_START + 1
Now BIN_START should match. To use dplyr, we’ll need a column that specifes a unique position in the genome. Create a new column that joins “CHROM” and “BIN_START” so that we match each value based on actual position in the genome.
fst.UKUS.50kb$POS_ID <- paste(fst.UKUS.50kb$CHROM,fst.UKUS.50kb$BIN_START,sep="-")
fst.AUUK.50kb$POS_ID <- paste(fst.AUUK.50kb$CHROM,fst.AUUK.50kb$BIN_START,sep="-")
fst.USAU.50kb$POS_ID <- paste(fst.USAU.50kb$CHROM,fst.USAU.50kb$BIN_START,sep="-")
pi.UK.50kb$POS_ID <- paste(pi.UK.50kb$CHROM,pi.UK.50kb$BIN_START,sep="-")
pi.AU.50kb$POS_ID <- paste(pi.AU.50kb$CHROM,pi.AU.50kb$BIN_START,sep="-")
pi.US.50kb$POS_ID <- paste(pi.US.50kb$CHROM,pi.US.50kb$BIN_START,sep="-")
TajD.UK.50kb$POS_ID <- paste(TajD.UK.50kb$CHROM,TajD.UK.50kb$BIN_START,sep="-")
TajD.AU.50kb$POS_ID <- paste(TajD.AU.50kb$CHROM,TajD.AU.50kb$BIN_START,sep="-")
TajD.US.50kb$POS_ID <- paste(TajD.US.50kb$CHROM,TajD.US.50kb$BIN_START,sep="-")
Now drop column names that we no longer need, otherwise we’ll have a giant table after all the merging.
fst.AUUK.50kb <- fst.AUUK.50kb %>% select(-CHROM, -BIN_START, -BIN_END, -N_VARIANTS)
fst.USAU.50kb <- fst.USAU.50kb %>% select(-CHROM, -BIN_START, -BIN_END, -N_VARIANTS)
pi.UK.50kb <- pi.UK.50kb %>% select(-CHROM, -BIN_START, -BIN_END, -N_VARIANTS)
pi.AU.50kb <- pi.AU.50kb %>% select(-CHROM, -BIN_START, -BIN_END, -N_VARIANTS)
pi.US.50kb <- pi.US.50kb %>% select(-CHROM, -BIN_START, -BIN_END, -N_VARIANTS)
TajD.UK.50kb <- TajD.UK.50kb %>% select(-CHROM, -BIN_START, -N_SNPS)
TajD.AU.50kb <- TajD.AU.50kb %>% select(-CHROM, -BIN_START, -N_SNPS)
TajD.US.50kb <- TajD.US.50kb %>% select(-CHROM, -BIN_START, -N_SNPS)
We’ll join tables based on the unique position identified (POS_ID). The rename() command ensures that each new column header specifies population (new name first). Heads up: each time you join two tables, those two tables are no longer accessible except as a joined table!
fstUKUS.fstAUUK.50kb <- left_join(fst.UKUS.50kb,fst.AUUK.50kb, by = "POS_ID", copy = FALSE, suffix=c("_UKUS","_AUUK"))
fst.50kb <- left_join(fstUKUS.fstAUUK.50kb,fst.USAU.50kb, by = "POS_ID", copy = FALSE, suffix=c("","_USAU"))
fst.50kb <- rename(fst.50kb, WEIGHTED_FST_USAU = WEIGHTED_FST)
fst.50kb <- rename(fst.50kb, MEAN_FST_USAU = MEAN_FST)
fst.50kb.piUK <- left_join(fst.50kb,pi.UK.50kb, by = "POS_ID", copy = FALSE)
fst.50kb.piUK.piUS <- left_join(fst.50kb.piUK,pi.US.50kb, by = "POS_ID", copy = FALSE,suffix=c("_UK","_US"))
fst.pi.50kb <- left_join(fst.50kb.piUK.piUS,pi.AU.50kb, by = "POS_ID", copy = FALSE)
fst.pi.50kb <- rename(fst.pi.50kb, PI_AU = PI)
fst.pi.50kb.TajDUK <- left_join(fst.pi.50kb,TajD.UK.50kb, by = "POS_ID", copy = FALSE)
fst.pi.50kb.TajDUK.TajDUS <- left_join(fst.pi.50kb.TajDUK,TajD.US.50kb, by = "POS_ID", copy = FALSE, suffix=c("_UK","_US"))
div <- left_join(fst.pi.50kb.TajDUK.TajDUS,TajD.AU.50kb, by = "POS_ID", copy = FALSE)
div <- rename(div, TajimaD_AU = TajimaD)
We’re going to drop data from: * small scaffolds (which conveniently start with ‘KQ’ or ‘LNCF’, * any rows (positions) with missing data (NA), * and also coerce “negative” FST or pi to zero. We replace POS_ID (which was converted to numeric) with SNP. For the qqman package, chromosomes need to be a numeric value, so we also use lapply() below to rename chromosomes.
div <- filter(div, !grepl('KQ',CHROM))
div <- filter(div, !grepl('LNCF',CHROM))
div <- filter(div, !grepl('Unknown',CHROM))
div <- div %>% drop_na()
div[,c(5:6)][div[,c(5:6)] < 0] <- 0
div[,c(8:14)][div[,c(8:14)] < 0] <- 0
div <- data.frame(lapply(div, function(x) {gsub("1A", "1.25", x)}))
div <- data.frame(lapply(div, function(x) {gsub("1B", "1.75", x)}))
div <- data.frame(lapply(div, function(x) {gsub("4A", "4.5", x)}))
div <- data.frame(lapply(div, function(x) {gsub("LG5", "28", x)}))
div <- data.frame(lapply(div, function(x) {gsub("LGE22", "29", x)}))
div <- data.frame(lapply(div, function(x) {gsub("Z", "0", x)}))
indx <- sapply(div, is.factor)
div[indx] <- lapply(div[indx], function(x) as.numeric(as.character(x)))
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
div <- div %>% select(-POS_ID)
div$SNP <- seq.int(nrow(div))
str(div)
## 'data.frame': 20071 obs. of 17 variables:
## $ CHROM : num 10 10 10 10 10 10 10 10 10 10 ...
## $ BIN_START : num 1 50001 100001 150001 200001 ...
## $ BIN_END : num 50000 100000 150000 200000 250000 300000 350000 400000 450000 500000 ...
## $ N_VARIANTS : num 165 377 311 120 121 156 80 101 24 18 ...
## $ WEIGHTED_FST_UKUS: num 0.01745 0.03751 0 0 0.00586 ...
## $ MEAN_FST_UKUS : num 0.0133 0.0272 0 0 0 ...
## $ WEIGHTED_FST_AUUK: num 0.00792 0.05948 0.00728 0.01624 0.12564 ...
## $ MEAN_FST_AUUK : num 0.0105 0.05 0 0.0275 0.0926 ...
## $ WEIGHTED_FST_USAU: num 0.01291 0.00258 0 0 0.00315 ...
## $ MEAN_FST_USAU : num 0.0093 0.00585 0 0 0 ...
## $ PI_UK : num 0.001053 0.002189 0.002212 0.000915 0.000879 ...
## $ PI_US : num 0.001063 0.002373 0.002003 0.000829 0.0007 ...
## $ PI_AU : num 0.001024 0.002211 0.0022 0.000804 0.000532 ...
## $ TajimaD_UK : num 0.808 0.293 0.877 1.426 0.968 ...
## $ TajimaD_US : num 0.42348 0.58012 0.80239 1.05163 -0.00424 ...
## $ TajimaD_AU : num 0.583 0.372 0.836 1.557 -0.557 ...
## $ SNP : int 1 2 3 4 5 6 7 8 9 10 ...
Now we’re ready to plot and calculate genome-wide values!
First, we look at the distribution of variation across the genome.
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Fst.UKUS.Manhattan.pdf",w=12,h=3)
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))
dev.off()
## quartz_off_screen
## 2
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Fst.AUUK.Manhattan.pdf",w=12,h=3)
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))
dev.off()
## quartz_off_screen
## 2
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_USAU",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Fst.USAU.Manhattan.pdf",w=12,h=3)
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_USAU",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))
dev.off()
## quartz_off_screen
## 2
quantile(div$WEIGHTED_FST_AUUK, c(.9,.95,.99,.999))
## 90% 95% 99% 99.9%
## 0.0676462 0.0862444 0.1490122 0.3077638
quantile(div$WEIGHTED_FST_UKUS, c(.9,.95,.99,.999))
## 90% 95% 99% 99.9%
## 0.0441469 0.0609766 0.1156152 0.2228447
mean(div$WEIGHTED_FST_AUUK) + 5*sd(div$WEIGHTED_FST_AUUK)
## [1] 0.1936444
mean(div$WEIGHTED_FST_UKUS) + 5*sd(div$WEIGHTED_FST_UKUS)
## [1] 0.1406712
div.outliers.AUUK <- div[which(div$WEIGHTED_FST_AUUK > quantile(div$WEIGHTED_FST_AUUK,.99)),]
div.outliers.USUK <- div[which(div$WEIGHTED_FST_UKUS > quantile(div$WEIGHTED_FST_UKUS,.99)),]
div.hifst.AUUK <- div[which(div$WEIGHTED_FST_AUUK > 0.1),]
div.hifst.UKUS <- div[which(div$WEIGHTED_FST_UKUS > 0.1),]
unique(div.outliers.USUK$CHROM)
## [1] 11.00 12.00 13.00 17.00 19.00 1.25 1.00 28.00 2.00 3.00 4.50 4.00
## [13] 6.00 29.00 0.00
length(div.outliers.USUK$SNP)
## [1] 201
write.csv(div.outliers.USUK,"/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/FstOutliers.USUK.csv")
unique(div.outliers.AUUK$CHROM)
## [1] 10.00 12.00 13.00 17.00 18.00 1.25 1.00 23.00 27.00 2.00 3.00 4.50
## [13] 4.00 5.00 6.00 7.00 8.00 0.00
length(div.outliers.AUUK$SNP)
## [1] 201
write.csv(div.outliers.AUUK,"/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/FstOutliers.AUUK.csv")
What’s going on w/ other metrics at these outliers?
summary(div.outliers.AUUK$PI_UK)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000025 0.0000730 0.0001890 0.0009734 0.0014188 0.0057224
summary(div.outliers.AUUK$PI_AU)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.383e-05 7.067e-05 2.285e-04 9.324e-04 1.447e-03 5.313e-03
summary(div.outliers.AUUK$TajimaD_AU)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.3050 0.2574 0.7501 0.6479 1.2528 2.6922
summary(div.outliers.AUUK$TajimaD_UK)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.2937 0.1481 0.6493 0.5926 1.0098 2.3588
summary(div.outliers.USUK$PI_US)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 7.167e-06 6.250e-05 2.890e-04 8.204e-04 1.281e-03 5.202e-03
summary(div.outliers.USUK$PI_UK)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000210 0.0000765 0.0002912 0.0008650 0.0012335 0.0058409
summary(div.outliers.USUK$TajimaD_US)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.1488 -0.4076 0.4343 0.3082 0.9422 2.9728
summary(div.outliers.USUK$TajimaD_UK)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.9252 0.3047 0.8963 0.8645 1.4612 2.5895
library(fitdistrplus)
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: survival
descdist(div.outliers.AUUK$PI_UK)
## summary statistics
## ------
## min: 2.5e-06 max: 0.00572238
## median: 0.000189
## mean: 0.0009734123
## estimated sd: 0.001348425
## estimated skewness: 1.597318
## estimated kurtosis: 4.653
descdist(div.outliers.AUUK$PI_AU)
## summary statistics
## ------
## min: 1.38333e-05 max: 0.00531275
## median: 0.0002285
## mean: 0.0009324382
## estimated sd: 0.001252321
## estimated skewness: 1.532558
## estimated kurtosis: 4.458218
descdist(div.outliers.USUK$PI_UK)
## summary statistics
## ------
## min: 2.1e-05 max: 0.00584092
## median: 0.000291167
## mean: 0.000865046
## estimated sd: 0.001150262
## estimated skewness: 1.693987
## estimated kurtosis: 5.349374
descdist(div.outliers.USUK$PI_US)
## summary statistics
## ------
## min: 7.16667e-06 max: 0.00520175
## median: 0.000289007
## mean: 0.0008203641
## estimated sd: 0.001078227
## estimated skewness: 1.65203
## estimated kurtosis: 5.131592
#beta.outliers.AUUK <- fitdist(div.outliers.AUUK$PI_AU, "beta")
#summary(beta.outliers.AUUK)
#beta.outliers.USUK <- fitdist(div.outliers.USUK$PI_US, "beta")
#summary(beta.outliers.USUK)
div.outliers.AUUK.lowFstUSAU <- div.outliers.AUUK[which(div.outliers.AUUK$WEIGHTED_FST_USAU < 0.01 ),]
div.outliers.AUUK.lowFstUSAU
## CHROM BIN_START BIN_END N_VARIANTS WEIGHTED_FST_UKUS MEAN_FST_UKUS
## 7 10.00 300001 350000 80 0.1027220 0.0600280
## 9 10.00 400001 450000 24 0.0578802 0.0408439
## 10 10.00 450001 500000 18 0.0641170 0.0438801
## 903 12.00 3500001 3550000 313 0.1841560 0.1390400
## 1414 13.00 8300001 8350000 6 0.0888562 0.0764819
## 2251 17.00 2350001 2400000 90 0.0718635 0.0453814
## 3399 1.25 25150001 25200000 80 0.1291380 0.1210420
## 3400 1.25 25200001 25250000 65 0.0763648 0.0755731
## 3793 1.25 44850001 44900000 7 0.1502550 0.1512780
## 3946 1.25 52500001 52550000 9 0.1002160 0.0841356
## 13219 4.50 6100001 6150000 132 0.1381170 0.1304820
## 13220 4.50 6150001 6200000 84 0.1382960 0.1325750
## 16265 6.00 5500001 5550000 61 0.2868590 0.2827380
## 16267 6.00 5600001 5650000 66 0.2488270 0.2538260
## 16268 6.00 5650001 5700000 90 0.2626990 0.2607650
## 16271 6.00 5800001 5850000 95 0.1569920 0.1609480
## 19739 0.00 51900001 51950000 60 0.1874130 0.1492570
## 19962 0.00 63050001 63100000 232 0.1055780 0.0686902
## 19999 0.00 64900001 64950000 186 0.1765470 0.1102810
## 20000 0.00 64950001 65000000 240 0.2637760 0.1947360
## 20002 0.00 65050001 65100000 450 0.1136620 0.0975800
## WEIGHTED_FST_AUUK MEAN_FST_AUUK WEIGHTED_FST_USAU MEAN_FST_USAU
## 7 0.190274 0.122576 0.000000000 0.000000000
## 9 0.150496 0.109492 0.000000000 0.000000000
## 10 0.197232 0.155583 0.000000000 0.000000000
## 903 0.172857 0.132176 0.000000000 0.000000000
## 1414 0.165927 0.125393 0.004134130 0.019587600
## 2251 0.179935 0.149568 0.000000000 0.000475237
## 3399 0.187165 0.173330 0.000000000 0.000000000
## 3400 0.179424 0.168180 0.000000000 0.000000000
## 3793 0.192503 0.166546 0.008088430 0.000000000
## 3946 0.157895 0.133751 0.000000000 0.000000000
## 13219 0.149469 0.141418 0.000000000 0.000000000
## 13220 0.164826 0.153364 0.000000000 0.000000000
## 16265 0.178806 0.178051 0.000000000 0.000000000
## 16267 0.155095 0.162605 0.000000000 0.000000000
## 16268 0.172276 0.167504 0.000000000 0.010494800
## 16271 0.155570 0.151752 0.004985280 0.006458370
## 19739 0.158887 0.126400 0.006943530 0.004539620
## 19962 0.159585 0.111195 0.000000000 0.003472230
## 19999 0.250744 0.187151 0.004209500 0.002449910
## 20000 0.230248 0.162655 0.000000000 0.000000000
## 20002 0.171135 0.133933 0.000369775 0.000000000
## PI_UK PI_US PI_AU TajimaD_UK TajimaD_US TajimaD_AU
## 7 5.80833e-04 3.92847e-04 2.90669e-04 1.010540 -0.569214 -1.333590
## 9 1.45667e-04 1.08668e-04 7.26667e-05 0.674272 -0.938436 -1.463810
## 10 1.15667e-04 7.03333e-05 3.06667e-05 0.510990 -1.402140 -0.520315
## 903 1.58583e-03 2.29027e-03 2.38120e-03 0.322469 1.214250 1.230810
## 1414 3.53333e-05 4.86667e-05 5.13333e-05 -0.078611 2.026320 1.435800
## 2251 5.73500e-04 5.66689e-04 5.08838e-04 0.877600 0.367470 0.645267
## 3399 6.12500e-04 8.16000e-04 7.46855e-04 1.160520 2.972800 2.474060
## 3400 4.95167e-04 6.38006e-04 5.69339e-04 1.386520 2.737810 2.391710
## 3793 6.46667e-05 4.11669e-05 4.38335e-05 1.877030 0.118964 0.239407
## 3946 7.31667e-05 6.43336e-05 5.60000e-05 1.281030 0.765878 0.581803
## 13219 1.16983e-03 4.09190e-04 3.88846e-04 2.126910 -2.148800 -2.179840
## 13220 7.55674e-04 2.66003e-04 2.38502e-04 2.154800 -2.022640 -2.304990
## 16265 2.88333e-05 5.33346e-04 4.75333e-04 1.723710 2.056500 1.339670
## 16267 5.08333e-05 5.92009e-04 5.25841e-04 0.721746 2.161640 1.510490
## 16268 6.93333e-05 7.88034e-04 6.67837e-04 -1.293670 2.136270 1.375800
## 16271 1.58333e-04 6.64365e-04 7.56515e-04 -0.174387 1.129640 1.427160
## 19739 3.98671e-04 3.79014e-04 4.32010e-04 1.206870 0.930709 1.050540
## 19962 1.55317e-03 1.37795e-03 1.14304e-03 0.640391 0.573849 0.685643
## 19999 8.16667e-04 1.26139e-03 1.36611e-03 -0.243400 0.635922 1.156450
## 20000 1.24650e-03 1.72167e-03 1.74439e-03 -0.207223 1.104400 1.037880
## 20002 2.43118e-03 3.10507e-03 2.86397e-03 0.924093 0.736868 0.386692
## SNP
## 7 7
## 9 9
## 10 10
## 903 903
## 1414 1414
## 2251 2251
## 3399 3399
## 3400 3400
## 3793 3793
## 3946 3946
## 13219 13219
## 13220 13220
## 16265 16265
## 16267 16267
## 16268 16268
## 16271 16271
## 19739 19739
## 19962 19962
## 19999 19999
## 20000 20000
## 20002 20002
length(div.outliers.AUUK.lowFstUSAU$SNP)
## [1] 21
div.outliers.USUK.lowFstUSAU <- div.outliers.USUK[which(div.outliers.USUK$WEIGHTED_FST_USAU < 0.01 ),]
div.outliers.USUK.lowFstUSAU
## CHROM BIN_START BIN_END N_VARIANTS WEIGHTED_FST_UKUS MEAN_FST_UKUS
## 903 12.00 3500001 3550000 313 0.184156 0.1390400
## 3399 1.25 25150001 25200000 80 0.129138 0.1210420
## 3772 1.25 43800001 43850000 9 0.233478 0.2201170
## 3773 1.25 43850001 43900000 9 0.164466 0.1563530
## 3774 1.25 43900001 43950000 8 0.222953 0.2224730
## 3781 1.25 44250001 44300000 8 0.198336 0.1773510
## 3788 1.25 44600001 44650000 8 0.183673 0.1641710
## 3790 1.25 44700001 44750000 9 0.198634 0.1724300
## 3791 1.25 44750001 44800000 4 0.126050 0.1050630
## 3792 1.25 44800001 44850000 5 0.158329 0.1520360
## 3793 1.25 44850001 44900000 7 0.150255 0.1512780
## 3826 1.25 46500001 46550000 16 0.119742 0.1104310
## 3941 1.25 52250001 52300000 6 0.144998 0.1072330
## 3950 1.25 52700001 52750000 3 0.122153 0.0888192
## 6514 1.00 106850001 106900000 40 0.132203 0.1245470
## 13217 4.50 6000001 6050000 87 0.127251 0.1166620
## 13218 4.50 6050001 6100000 122 0.124097 0.1208020
## 13219 4.50 6100001 6150000 132 0.138117 0.1304820
## 13220 4.50 6150001 6200000 84 0.138296 0.1325750
## 14057 4.00 27600001 27650000 43 0.124484 0.1000850
## 14058 4.00 27650001 27700000 15 0.124772 0.1110270
## 14243 4.00 36900001 36950000 120 0.144461 0.1311450
## 16262 6.00 5350001 5400000 23 0.152651 0.1426320
## 16265 6.00 5500001 5550000 61 0.286859 0.2827380
## 16266 6.00 5550001 5600000 76 0.236749 0.2467770
## 16267 6.00 5600001 5650000 66 0.248827 0.2538260
## 16268 6.00 5650001 5700000 90 0.262699 0.2607650
## 16269 6.00 5700001 5750000 118 0.200669 0.1998990
## 16270 6.00 5750001 5800000 84 0.189015 0.1918170
## 16271 6.00 5800001 5850000 95 0.156992 0.1609480
## 19081 0.00 18950001 19000000 222 0.189965 0.1459400
## 19082 0.00 19000001 19050000 193 0.145196 0.1232680
## 19739 0.00 51900001 51950000 60 0.187413 0.1492570
## 19800 0.00 54950001 55000000 332 0.138968 0.1067000
## 19932 0.00 61550001 61600000 113 0.165185 0.1225240
## 19999 0.00 64900001 64950000 186 0.176547 0.1102810
## 20000 0.00 64950001 65000000 240 0.263776 0.1947360
## 20001 0.00 65000001 65050000 277 0.128128 0.0954193
## 20006 0.00 65250001 65300000 194 0.175280 0.1503040
## WEIGHTED_FST_AUUK MEAN_FST_AUUK WEIGHTED_FST_USAU MEAN_FST_USAU
## 903 0.1728570 0.1321760 0.00000000 0.00000000
## 3399 0.1871650 0.1733300 0.00000000 0.00000000
## 3772 0.1356170 0.1306400 0.00000000 0.00000000
## 3773 0.0852238 0.0765575 0.00000000 0.00000000
## 3774 0.1335340 0.1317840 0.00000000 0.00000000
## 3781 0.1177940 0.1036950 0.00646088 0.00000000
## 3788 0.1177940 0.1036950 0.00209059 0.00000000
## 3790 0.1181560 0.1062690 0.00000000 0.00000000
## 3791 0.1087810 0.0889597 0.00000000 0.00000000
## 3792 0.1012870 0.1007000 0.00000000 0.00000000
## 3793 0.1925030 0.1665460 0.00808843 0.00000000
## 3826 0.1379380 0.1391420 0.00000000 0.00899771
## 3941 0.0853294 0.0588546 0.00000000 0.00000000
## 3950 0.0778325 0.0539009 0.00000000 0.00000000
## 6514 0.1120650 0.1021280 0.00000000 0.00000000
## 13217 0.1235350 0.1125110 0.00000000 0.00000000
## 13218 0.1407820 0.1299840 0.00000000 0.00000000
## 13219 0.1494690 0.1414180 0.00000000 0.00000000
## 13220 0.1648260 0.1533640 0.00000000 0.00000000
## 14057 0.0421729 0.0368661 0.00000000 0.00000000
## 14058 0.0708094 0.0544110 0.00000000 0.00000000
## 14243 0.0396591 0.0321878 0.00000000 0.00000000
## 16262 0.1195050 0.1207770 0.00000000 0.00000000
## 16265 0.1788060 0.1780510 0.00000000 0.00000000
## 16266 0.1419130 0.1501990 0.00000000 0.00000000
## 16267 0.1550950 0.1626050 0.00000000 0.00000000
## 16268 0.1722760 0.1675040 0.00000000 0.01049480
## 16269 0.1428110 0.1457580 0.00000000 0.00000000
## 16270 0.1452970 0.1502240 0.00000000 0.00000000
## 16271 0.1555700 0.1517520 0.00498528 0.00645837
## 19081 0.0750444 0.0538881 0.00000000 0.00000000
## 19082 0.0193002 0.0267927 0.00000000 0.00000000
## 19739 0.1588870 0.1264000 0.00694353 0.00453962
## 19800 0.0791149 0.0666468 0.00000000 0.00000000
## 19932 0.0833852 0.0580127 0.00000000 0.00000000
## 19999 0.2507440 0.1871510 0.00420950 0.00244991
## 20000 0.2302480 0.1626550 0.00000000 0.00000000
## 20001 0.1301150 0.1041760 0.00000000 0.00000000
## 20006 0.1216110 0.1062280 0.00000000 0.00000000
## PI_UK PI_US PI_AU TajimaD_UK TajimaD_US TajimaD_AU
## 903 1.58583e-03 2.29027e-03 2.38120e-03 0.322469 1.214250 1.2308100
## 3399 6.12500e-04 8.16000e-04 7.46855e-04 1.160520 2.972800 2.4740600
## 3772 9.01667e-05 2.65000e-05 4.65000e-05 2.432020 -1.878520 -0.5244280
## 3773 8.30000e-05 4.03333e-05 4.91667e-05 1.946800 -0.941941 -0.3438820
## 3774 8.16667e-05 2.76668e-05 4.20002e-05 2.500830 -1.353500 -0.3733890
## 3781 7.41667e-05 2.76667e-05 3.45000e-05 1.940080 -1.536610 -0.6423260
## 3788 7.41667e-05 2.98333e-05 3.45000e-05 1.940080 -1.374610 -0.6423260
## 3790 8.61667e-05 2.65001e-05 3.85002e-05 2.161200 -1.833620 -0.6350750
## 3791 3.55000e-05 1.96667e-05 2.45000e-05 1.478110 -0.576488 0.0507059
## 3792 4.75000e-05 2.45000e-05 3.60003e-05 1.898740 -0.616373 0.2162880
## 3793 6.46667e-05 4.11669e-05 4.38335e-05 1.877030 0.118964 0.2394070
## 3826 1.56500e-04 1.14501e-04 1.17334e-04 2.455710 0.890794 1.0316700
## 3941 4.66669e-05 1.55001e-05 2.20002e-05 1.110000 -1.317040 -0.6062470
## 3950 2.56667e-05 1.18333e-05 1.58333e-05 1.216020 -1.001800 -0.3605060
## 6514 2.03833e-04 3.69672e-04 3.52672e-04 -0.557074 2.328280 2.7553400
## 13217 7.23833e-04 2.57341e-04 3.00170e-04 1.775250 -2.075340 -1.8571700
## 13218 1.04117e-03 4.05516e-04 3.68677e-04 2.007180 -1.910490 -2.1351400
## 13219 1.16983e-03 4.09190e-04 3.88846e-04 2.126910 -2.148800 -2.1798400
## 13220 7.55674e-04 2.66003e-04 2.38502e-04 2.154800 -2.022640 -2.3049900
## 14057 3.81000e-04 3.27002e-04 3.94502e-04 2.124500 1.261110 2.2358600
## 14058 1.20500e-04 9.08347e-05 1.06668e-04 1.303780 1.346540 1.2527000
## 14243 1.05652e-03 8.72538e-04 9.60703e-04 2.022060 1.051440 1.8017000
## 16262 5.05000e-05 1.48335e-04 1.75833e-04 -0.253609 0.363975 0.8790340
## 16265 2.88333e-05 5.33346e-04 4.75333e-04 1.723710 2.056500 1.3396700
## 16266 9.40005e-05 6.90347e-04 6.21012e-04 0.189762 2.226990 1.4170800
## 16267 5.08333e-05 5.92009e-04 5.25841e-04 0.721746 2.161640 1.5104900
## 16268 6.93333e-05 7.88034e-04 6.67837e-04 -1.293670 2.136270 1.3758000
## 16269 1.06667e-04 9.61546e-04 9.50343e-04 -1.925220 1.627650 1.3995400
## 16270 7.71667e-05 6.45525e-04 6.42512e-04 -0.682127 1.393540 1.2760900
## 16271 1.58333e-04 6.64365e-04 7.56515e-04 -0.174387 1.129640 1.4271600
## 19081 1.18901e-03 1.24461e-03 1.22436e-03 0.862748 0.758027 0.7976620
## 19082 1.11667e-03 1.30971e-03 1.30786e-03 2.029490 0.784797 1.0812900
## 19739 3.98671e-04 3.79014e-04 4.32010e-04 1.206870 0.930709 1.0505400
## 19800 2.37000e-03 1.99995e-03 2.30563e-03 1.246390 0.748836 1.2112400
## 19932 6.72170e-04 8.29064e-04 8.34358e-04 0.502783 1.071620 1.1047700
## 19999 8.16667e-04 1.26139e-03 1.36611e-03 -0.243400 0.635922 1.1564500
## 20000 1.24650e-03 1.72167e-03 1.74439e-03 -0.207223 1.104400 1.0378800
## 20001 1.53219e-03 1.96551e-03 2.01766e-03 0.304739 1.123120 1.3799400
## 20006 1.57217e-03 1.30438e-03 1.47139e-03 1.530390 1.099090 1.1772900
## SNP
## 903 903
## 3399 3399
## 3772 3772
## 3773 3773
## 3774 3774
## 3781 3781
## 3788 3788
## 3790 3790
## 3791 3791
## 3792 3792
## 3793 3793
## 3826 3826
## 3941 3941
## 3950 3950
## 6514 6514
## 13217 13217
## 13218 13218
## 13219 13219
## 13220 13220
## 14057 14057
## 14058 14058
## 14243 14243
## 16262 16262
## 16265 16265
## 16266 16266
## 16267 16267
## 16268 16268
## 16269 16269
## 16270 16270
## 16271 16271
## 19081 19081
## 19082 19082
## 19739 19739
## 19800 19800
## 19932 19932
## 19999 19999
## 20000 20000
## 20001 20001
## 20006 20006
length(div.outliers.USUK.lowFstUSAU$SNP)
## [1] 39
Possible parallel “selection” ?
What’s the statistical distribution of these values?
summary(div$WEIGHTED_FST_AUUK)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.01132 0.02659 0.03258 0.04437 0.40190
summary(div$WEIGHTED_FST_UKUS)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000000 0.0001839 0.0125022 0.0187334 0.0263484 0.3414900
summary(div$WEIGHTED_FST_USAU)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.01724 0.03414 0.04038 0.05435 0.48283
lab.AU <- rep("AU.UK",length(div$WEIGHTED_FST_AUUK))
lab.US <- rep("UK.US",length(div$WEIGHTED_FST_UKUS))
Fst.group <- c(lab.AU,lab.US)
Fst.hist.data <- c(div$WEIGHTED_FST_AUUK,div$WEIGHTED_FST_USUK)
Fst.hist <- data.frame(Fst = Fst.hist.data, population = Fst.group)
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/HistDensity_Fst.pdf",width=4,height=3)
ggplot(Fst.hist, aes(x=Fst, y=..density.., fill=population)) +
theme_classic() +
geom_density(alpha=0.5,lwd=0.5) +
scale_fill_manual(values=c("#F2C14E","#2c81a8")) + xlim(0,0.5) +
xlab("Fst") + labs(fill="Population") +
geom_vline(xintercept=0.03,colour=alpha("#F2C14E"),linetype="dashed", size=1) +
geom_vline(xintercept=0.01,colour=alpha("#2c81a8"),linetype="dashed", size=1) +
geom_vline(xintercept=0.08,colour=alpha("gray50"),linetype="dotted", size=0.5)
dev.off()
## quartz_off_screen
## 2
ggplot(Fst.hist, aes(x=Fst, y=..density.., fill=population)) +
theme_classic() +
geom_density(alpha=0.5,lwd=0.5) +
scale_fill_manual(values=c("#F2C14E","#2c81a8")) + xlim(0,0.5) +
xlab("Fst") + labs(fill="Population") +
geom_vline(xintercept=0.03,colour=alpha("#F2C14E"),linetype="dashed", size=1) +
geom_vline(xintercept=0.01,colour=alpha("#2c81a8"),linetype="dashed", size=1) +
geom_vline(xintercept=0.08,colour=alpha("gray50"),linetype="dotted", size=0.5)
ggplot(data=div) +
geom_point(aes(x=div$WEIGHTED_FST_UKUS, y=div$PI_US),col="#2c81a8",cex=0.7) +
#xlab("Fst (Native vs. Invasive)") + ylab("Pi Invasive") +
xlab("") + ylab("") +
stat_smooth(aes(x=WEIGHTED_FST_UKUS, y=PI_US),method="loess",col="black",lwd=0.5) +
xlim(0,0.31) + ylim(0,0.04) + theme_classic()
## Warning: Use of `div$WEIGHTED_FST_UKUS` is discouraged. Use `WEIGHTED_FST_UKUS`
## instead.
## Warning: Use of `div$PI_US` is discouraged. Use `PI_US` instead.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/FstPi.density.USUK.pdf",height=2,width=2)
ggplot(data=div) +
geom_point(aes(x=div$WEIGHTED_FST_UKUS, y=div$PI_US),col="#2c81a8",cex=0.7) +
#xlab("Fst (Native vs. Invasive)") + ylab("Pi Invasive") +
xlab("") + ylab("") +
stat_smooth(aes(x=WEIGHTED_FST_UKUS, y=PI_US),method="loess",col="black",lwd=0.5) +
xlim(0,0.31) + ylim(0,0.04) + theme_classic()
## Warning: Use of `div$WEIGHTED_FST_UKUS` is discouraged. Use `WEIGHTED_FST_UKUS`
## instead.
## Warning: Use of `div$PI_US` is discouraged. Use `PI_US` instead.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).
dev.off()
## quartz_off_screen
## 2
ggplot(data=div) +
geom_point(aes(x=div$WEIGHTED_FST_AUUK, y=div$PI_AU),col="#F2C14E",cex=0.7) +
xlim(0,0.31) + ylim(0,0.04) + theme_classic() +
#xlab("Fst (Native vs. Invasive)") + ylab("Pi Invasive") +
stat_smooth(aes(x=WEIGHTED_FST_AUUK, y=PI_AU),method="loess",col="black",lwd=0.5) +
xlab("") + ylab("")
## Warning: Use of `div$WEIGHTED_FST_AUUK` is discouraged. Use `WEIGHTED_FST_AUUK`
## instead.
## Warning: Use of `div$PI_AU` is discouraged. Use `PI_AU` instead.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 17 rows containing non-finite values (stat_smooth).
## Warning: Removed 17 rows containing missing values (geom_point).
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/FstPi.density.AUUK.pdf",height=2,width=2)
ggplot(data=div) +
geom_point(aes(x=div$WEIGHTED_FST_AUUK, y=div$PI_AU),col="#F2C14E",cex=0.7) +
xlim(0,0.31) + ylim(0,0.04) + theme_classic() +
#xlab("Fst (Native vs. Invasive)") + ylab("Pi Invasive") +
stat_smooth(aes(x=WEIGHTED_FST_AUUK, y=PI_AU),method="loess",col="black",lwd=0.5) +
xlab("") + ylab("")
## Warning: Use of `div$WEIGHTED_FST_AUUK` is discouraged. Use `WEIGHTED_FST_AUUK`
## instead.
## Warning: Use of `div$PI_AU` is discouraged. Use `PI_AU` instead.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 17 rows containing non-finite values (stat_smooth).
## Warning: Removed 17 rows containing missing values (geom_point).
dev.off()
## quartz_off_screen
## 2
summary(div$PI_AU)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000025 0.0027682 0.0040090 0.0038645 0.0050565 0.0138998
summary(div$PI_UK)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000025 0.0028343 0.0041312 0.0039757 0.0051943 0.0141869
summary(div$PI_US)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.667e-06 2.753e-03 4.016e-03 3.863e-03 5.045e-03 1.409e-02
descdist(div$PI_US)
## summary statistics
## ------
## min: 4.66667e-06 max: 0.0140869
## median: 0.00401555
## mean: 0.003862564
## estimated sd: 0.001783127
## estimated skewness: -0.1258447
## estimated kurtosis: 3.05688
#norm.piUS.fit <- fitdist(div$PI_US,distr="norm")
#summary(norm.piUS.fit)
descdist(div$PI_UK)
## summary statistics
## ------
## min: 2.5e-06 max: 0.0141869
## median: 0.00413121
## mean: 0.003975727
## estimated sd: 0.001837387
## estimated skewness: -0.1188124
## estimated kurtosis: 3.055468
#norm.piUK.fit <- fitdist(div$PI_UK,distr="norm")
#summary(norm.piUK.fit)
descdist(div$PI_AU)
## summary statistics
## ------
## min: 2.5e-06 max: 0.0138998
## median: 0.00400898
## mean: 0.003864496
## estimated sd: 0.001775347
## estimated skewness: -0.1396863
## estimated kurtosis: 3.030743
#norm.piAU.fit <- fitdist(div$PI_AU,distr="norm")
#summary(norm.piAU.fit)
lab.AU <- rep("AU",length(div$PI_AU))
lab.US <- rep("US",length(div$PI_US))
lab.UK <- rep("UK",length(div$PI_UK))
group <- c(lab.AU,lab.US,lab.UK)
pi.hist.data <- c(div$PI_UK,div$PI_US,div$PI_AU)
pi.hist.lab <- data.frame(pi = pi.hist.data, population = group)
str(pi.hist.lab)
## 'data.frame': 60213 obs. of 2 variables:
## $ pi : num 0.001053 0.002189 0.002212 0.000915 0.000879 ...
## $ population: Factor w/ 3 levels "AU","UK","US": 1 1 1 1 1 1 1 1 1 1 ...
ggplot(pi.hist.lab, aes(x=pi, y=..density.., fill=population)) +
geom_density(alpha=0.8,lwd=0.5) + theme_classic() +
scale_fill_manual(values=c("black","#2c81a8","#F2C14E")) + xlim(-0.0001,0.02) +
xlab("Pi") + labs(fill="Population") +
geom_vline(xintercept=mean(div$PI_AU),colour=alpha("#F2C14E"),linetype="dashed", size=1) +
geom_vline(xintercept=mean(div$PI_US),colour=alpha("#2c81a8"),linetype="dashed", size=1) +
theme(legend.position="none")
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/HistDensity_Pi.pdf",width=4,height=3)
ggplot(pi.hist.lab, aes(x=pi, y=..density.., fill=population)) +
geom_density(alpha=0.8,lwd=0.5) + theme_classic() +
scale_fill_manual(values=c("black","#2c81a8","#F2C14E")) + xlim(-0.0001,0.02) +
xlab("Pi") + labs(fill="Population") +
geom_vline(xintercept=mean(div$PI_US),colour=alpha("#2c81a8"),linetype="dashed", size=1) +
geom_vline(xintercept=mean(div$PI_AU),colour=alpha("#F2C14E"),linetype="dashed", size=1) +
theme(legend.position="none")
dev.off()
## quartz_off_screen
## 2
Average nucleotide diversity for both invasions is the same (0.003). There are two vertical lines overlaid in the plot above.
ggplot(data=div) +
geom_point(aes(x=PI_UK, y=PI_US),col="#2c81a8",cex=0.7) +
xlab("") + ylab("") + xlim(0,0.02) + ylim(0,0.02) + theme_classic() +
theme(axis.text=element_text(size=7,colour="black")) +
stat_smooth(aes(x=PI_UK, y=PI_US),span=0.2,method="loess",col="black",lwd=0.5)
## `geom_smooth()` using formula 'y ~ x'
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Pi_USvsUK.pdf",width=2,height=2)
ggplot(data=div) +
geom_point(aes(x=PI_UK, y=PI_US),col="#2c81a8",cex=0.7) +
xlab("") + ylab("") + xlim(0,0.02) + ylim(0,0.02) + theme_classic() +
theme(axis.text=element_text(size=7,colour="black")) +
stat_smooth(aes(x=PI_UK, y=PI_US),span=0.2,method="loess",col="black",lwd=0.5)
## `geom_smooth()` using formula 'y ~ x'
dev.off()
## quartz_off_screen
## 2
ggplot(data=div.outliers.USUK) +
geom_point(aes(x=PI_UK, y=PI_US),col="#2c81a8",cex=0.7) +
xlab("") + ylab("") + xlim(0,0.01) + ylim(0,0.01) + theme_classic() +
theme(axis.text=element_text(size=7,colour="black")) +
stat_smooth(aes(x=PI_UK, y=PI_US),span=0.2,method="loess",col="black",lwd=0.5)
## `geom_smooth()` using formula 'y ~ x'
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/PiOutliers_USvsUK.pdf",width=2,height=2)
ggplot(data=div.outliers.USUK) +
geom_point(aes(x=PI_UK, y=PI_US),col="#2c81a8",cex=0.7) +
xlab("") + ylab("") + xlim(0,0.01) + ylim(0,0.01) + theme_classic() +
theme(axis.text=element_text(size=7,colour="black")) +
stat_smooth(aes(x=PI_UK, y=PI_US),span=0.2,method="loess",col="black",lwd=0.5)
## `geom_smooth()` using formula 'y ~ x'
dev.off()
## quartz_off_screen
## 2
ggplot(data=div) +
geom_point(aes(x=PI_UK, y=PI_AU),col="#F2C14E",cex=0.7) +
xlab("") + ylab("") +
xlim(0,0.02) + ylim(0,0.02) + theme_classic() +
theme(axis.text=element_text(size=7,colour="black")) +
stat_smooth(aes(x=PI_UK, y=PI_AU),span=0.2,method="loess",col="black",lwd=0.5)
## `geom_smooth()` using formula 'y ~ x'
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Pi_AUvsUK.pdf",width=2,height=2)
ggplot(data=div) +
geom_point(aes(x=PI_UK, y=PI_AU),col="#F2C14E",cex=0.7) +
xlab("") + ylab("") +
xlim(0,0.02) + ylim(0,0.02) + theme_classic() +
theme(axis.text=element_text(size=7,colour="black")) +
stat_smooth(aes(x=PI_UK, y=PI_AU),span=0.2,method="loess",col="black",lwd=0.5)
## `geom_smooth()` using formula 'y ~ x'
dev.off()
## quartz_off_screen
## 2
ggplot(data=div.outliers.AUUK) +
geom_point(aes(x=PI_UK, y=PI_AU),col="#F2C14E",cex=0.7) +
xlab("") + ylab("") +
xlim(0,0.01) + ylim(0,0.01) + theme_classic() +
theme(axis.text=element_text(size=7,colour="black")) +
stat_smooth(aes(x=PI_UK, y=PI_AU),span=0.2,method="loess",col="black",lwd=0.5)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing missing values (geom_smooth).
dev.off()
## null device
## 1
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Pi_AUvsUK.pdf",width=2,height=2)
ggplot(data=div.outliers.AUUK) +
geom_point(aes(x=PI_UK, y=PI_AU),col="#F2C14E",cex=0.7) +
xlab("") + ylab("") +
xlim(0,0.01) + ylim(0,0.01) + theme_classic() +
theme(axis.text=element_text(size=7,colour="black")) +
stat_smooth(aes(x=PI_UK, y=PI_AU),span=0.2,method="loess",col="black",lwd=0.5)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing missing values (geom_smooth).
dev.off()
## null device
## 1
descdist(div$TajimaD_US)
## summary statistics
## ------
## min: -2.1488 max: 3.16509
## median: 0.744616
## mean: 0.7115798
## estimated sd: 0.3466121
## estimated skewness: -1.309557
## estimated kurtosis: 13.47233
descdist(div$TajimaD_UK)
## summary statistics
## ------
## min: -2.2276 max: 3.23445
## median: 0.737875
## mean: 0.7152536
## estimated sd: 0.3263563
## estimated skewness: -0.4741872
## estimated kurtosis: 12.17869
descdist(div$TajimaD_AU)
## summary statistics
## ------
## min: -2.33194 max: 3.16824
## median: 0.798065
## mean: 0.7829885
## estimated sd: 0.3372602
## estimated skewness: -0.5499719
## estimated kurtosis: 12.87531
descdist(div.outliers.USUK$TajimaD_US)
## summary statistics
## ------
## min: -2.1488 max: 2.9728
## median: 0.434343
## mean: 0.3082123
## estimated sd: 1.017184
## estimated skewness: -0.2567428
## estimated kurtosis: 2.728269
descdist(div.outliers.AUUK$TajimaD_AU)
## summary statistics
## ------
## min: -2.30499 max: 2.69219
## median: 0.750086
## mean: 0.6478708
## estimated sd: 0.8718095
## estimated skewness: -0.7820734
## estimated kurtosis: 3.98327
What’s the difference in diversity between native and invasive ranges?
div$piUK.piAU <- div$PI_UK - div$PI_AU
div$piUK.piUS <- div$PI_UK - div$PI_US
summary(div$piUK.piAU)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.552e-03 -1.935e-05 9.278e-05 1.112e-04 2.319e-04 2.028e-03
qqnorm(div$piUK.piAU, pch = 16)
qqline(div$piUK.piAU, pch = 16)
summary(div$piUK.piUS)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.381e-03 -2.535e-06 1.035e-04 1.132e-04 2.277e-04 1.762e-03
qqnorm(div$piUK.piUS, pch = 16)
qqline(div$piUK.piUS, pch = 16)
div.hiUKpi.vsAU <- div[which(div$piUK.piAU > 0),]
div.hiUKpi.vsUS <- div[which(div$piUK.piUS > 0),]
div.hiUKpi.both <- div.hiUKpi.vsAU[which(div$piUK.piUS > 0),]
length(div.hiUKpi.vsAU$piUK.piAU)/length(div$piUK.piAU) # % of windows that have higher pi in native
## [1] 0.7049973
length(div.hiUKpi.vsUS$piUK.piUS)/length(div$piUK.piUS)
## [1] 0.7432116
length(div.hiUKpi.both$piUK.piUS)/length(div$piUK.piUS) # % of windows with higher pi in both invasive ranges
## [1] 0.7432116
If drift acting on novel mutations explains most of the differentiation, then where FST is high, diversity should also be higher in the invasions. Could also be due to weaker purifying selection during population expansion.
div.hifst.AUUK$piUK.piAU <- div.hifst.AUUK$PI_UK - div.hifst.AUUK$PI_AU # UK vs AU in high fst regions only
summary(div.hifst.AUUK$piUK.piAU)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.197e-03 -4.330e-05 3.283e-05 1.246e-04 2.897e-04 2.028e-03
div.hifst.AUUK.hiUKpi <- div.hifst.AUUK[which(div.hifst.AUUK$piUK.piAU > 0),]
length(div.hifst.AUUK.hiUKpi$piUK.piAU)/length(div.hifst.AUUK$piUK.piAU) # % of windows that have higher pi in native
## [1] 0.6224189
div.hifst.UKUS$piUK.piUS <- div.hifst.UKUS$PI_UK - div.hifst.UKUS$PI_US # UK vs US in high fst regions only
summary(div.hifst.UKUS$piUK.piUS)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.198e-03 -2.150e-05 2.550e-05 6.589e-05 1.648e-04 1.762e-03
div.hifst.UKUS.hiUKpi <- div.hifst.UKUS[which(div.hifst.UKUS$piUK.piUS > 0),]
length(div.hifst.UKUS.hiUKpi$piUK.piUS)/length(div.hifst.UKUS$piUK.piUS)
## [1] 0.6989619
div.hiUKpi.both <- div.hifst.UKUS.hiUKpi[which(div.hifst.AUUK.hiUKpi$piUK.piUS > 0),]
length(div.hiUKpi.both$piUK.piUS)
## [1] 0
When comparing ancestral diversity to a bottlenecked invasive population, we’d expect to see lower diversity in the invasive range overall. This difference would be more pronounced in regions that had differentiated from the native range (e.g., where drift and/or selection are more pronounced).
Here, difference in pi > 0 means that diversity is higher in the native range than in the invasive.
lab.AU <- rep("AU",length(div$piUK.piAU))
lab.US <- rep("US",length(div$piUK.piUS))
group <- c(lab.AU,lab.US)
pi.hist.data <- c(div$piUK.piUS,div$piUK.piAU)
pi.hist.lab <- data.frame(pi = pi.hist.data, population = group)
str(pi.hist.lab)
## 'data.frame': 40142 obs. of 2 variables:
## $ pi : num -1.04e-05 -1.83e-04 2.09e-04 8.63e-05 1.79e-04 ...
## $ population: Factor w/ 2 levels "AU","US": 1 1 1 1 1 1 1 1 1 1 ...
ggplot(pi.hist.lab, aes(x=pi, y=..density.., fill=population)) +
geom_density(alpha=0.8,lwd=0.5) + theme_classic() +
scale_fill_manual(values=c("#2c81a8","#F2C14E")) +
xlab("DIfference in pi") + labs(fill="Population") +
geom_vline(xintercept=mean(div$piUK.piAU),colour=alpha("#F2C14E"), size=1) +
geom_vline(xintercept=mean(div$piUK.piUS),colour=alpha("#2c81a8"), size=1) +
geom_vline(xintercept=0,colour="black",size=0.5) +
theme(legend.position="none")
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/HistDensity_PiDifference.pdf",width=4,height=3)
ggplot(pi.hist.lab, aes(x=pi, y=..density.., fill=population)) +
geom_density(alpha=0.8,lwd=0.5) + theme_classic() +
scale_fill_manual(values=c("#2c81a8","#F2C14E")) +
xlab("Difference in pi") + labs(fill="Population") +
geom_vline(xintercept=mean(div$piUK.piAU),colour=alpha("#F2C14E"), size=1) +
geom_vline(xintercept=mean(div$piUK.piUS),colour=alpha("#2c81a8"), size=1) +
geom_vline(xintercept=0,colour="black",size=0.5) +
theme(legend.position="none")
dev.off()
## quartz_off_screen
## 2
What about with high-fst (FST > 0.1) windows only?
lab.AU <- rep("AU",length(div.hifst.AUUK$piUK.piAU))
lab.US <- rep("US",length(div.hifst.UKUS$piUK.piUS))
group <- c(lab.AU,lab.US)
pi.hist.data <- c(div.hifst.UKUS$piUK.piUS,div.hifst.AUUK$piUK.piAU)
pi.hist.lab <- data.frame(pi = pi.hist.data, population = group)
str(pi.hist.lab)
## 'data.frame': 967 obs. of 2 variables:
## $ pi : num 1.88e-04 8.97e-05 1.13e-04 4.36e-04 2.19e-04 ...
## $ population: Factor w/ 2 levels "AU","US": 1 1 1 1 1 1 1 1 1 1 ...
ggplot(pi.hist.lab, aes(x=pi, y=..density.., fill=population)) +
geom_density(alpha=0.8,lwd=0.5) + theme_classic() +
scale_fill_manual(values=c("#2c81a8","#F2C14E")) +
xlab("Difference in pi") + labs(fill="Population") +
geom_vline(xintercept=mean(div.hifst.AUUK$piUK.piAU),colour=alpha("#F2C14E"), size=1) +
geom_vline(xintercept=mean(div.hifst.UKUS$piUK.piUS),colour=alpha("#2c81a8"), size=1) +
geom_vline(xintercept=0,colour="black",size=0.5) +
theme(legend.position="none")
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/HistDensity_PiDifference_HiFst.pdf",width=4,height=3)
ggplot(pi.hist.lab, aes(x=pi, y=..density.., fill=population)) +
geom_density(alpha=0.8,lwd=0.5) + theme_classic() +
scale_fill_manual(values=c("#2c81a8","#F2C14E")) +
xlab("DIfference in pi") + labs(fill="Population") +
geom_vline(xintercept=mean(div.hifst.AUUK$piUK.piAU),colour=alpha("#F2C14E"), size=1) +
geom_vline(xintercept=mean(div.hifst.UKUS$piUK.piUS),colour=alpha("#2c81a8"), size=1) +
geom_vline(xintercept=0,colour="black",size=0.5) +
theme(legend.position="none")
dev.off()
## quartz_off_screen
## 2
In a given window, if diversity in the invasive range is higher than that of the native range, it is possible that those variants are novel mutations. This filtering will tell us whether we should look at particular genotypes in these regions.
Possibly “novel” diversity (e.g., lower than average native diversity and higher than average invasive)
div.lownatpi <- div[which(div$PI_UK < mean(div$PI_UK)),]
length(div.lownatpi$SNP)
## [1] 9337
unique(div.lownatpi$CHROM)
## [1] 10.00 11.00 12.00 13.00 14.00 15.00 16.00 17.00 18.00 19.00 1.25 1.75
## [13] 1.00 20.00 21.00 22.00 23.00 24.00 25.00 26.00 27.00 28.00 2.00 3.00
## [25] 4.50 4.00 5.00 6.00 7.00 8.00 9.00 29.00 0.00
length(unique(div.lownatpi$CHROM))
## [1] 33
Number of SNPs have below-average diversity in the native range. Of these SNPs, how many have higher-than average pi in the invasive range (AU or US)?
summary(div.lownatpi$PI_AU)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000025 0.0015566 0.0026495 0.0023695 0.0033432 0.0052840
div.novelAUpi <- div.lownatpi[which(div.lownatpi$PI_AU > mean(div.lownatpi$PI_AU)),]
unique(div.novelAUpi$CHROM) # which chromosomes
## [1] 10.00 11.00 12.00 13.00 14.00 15.00 17.00 18.00 19.00 1.25 1.75 1.00
## [13] 20.00 21.00 22.00 23.00 24.00 25.00 26.00 27.00 28.00 2.00 3.00 4.50
## [25] 4.00 5.00 6.00 7.00 8.00 9.00 29.00 0.00
length(div.novelAUpi$CHROM) # number of windows
## [1] 5362
length(div.novelAUpi$SNP)/length(div.lownatpi$SNP)
## [1] 0.5742744
% of low native diversity SNPs are higher in AU diversity.
summary(div.lownatpi$PI_US)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.667e-06 1.541e-03 2.632e-03 2.357e-03 3.334e-03 4.800e-03
div.novelUSpi <- div.lownatpi[which(div.lownatpi$PI_US > mean(div.lownatpi$PI_US)),]
length(div.novelUSpi$SNP)/length(div.lownatpi$SNP)
## [1] 0.5774874
% of low native diversity SNPs are higher in US diversity.
unique(div.novelUSpi$CHROM)
## [1] 10.00 11.00 12.00 13.00 14.00 15.00 17.00 18.00 19.00 1.25 1.75 1.00
## [13] 20.00 21.00 22.00 23.00 24.00 25.00 26.00 27.00 28.00 2.00 3.00 4.50
## [25] 4.00 5.00 6.00 7.00 8.00 9.00 29.00 0.00
length(div.novelUSpi$CHROM)
## [1] 5392
intersect(div.novelAUpi$CHROM,div.novelUSpi$CHROM)
## [1] 10.00 11.00 12.00 13.00 14.00 15.00 17.00 18.00 19.00 1.25 1.75 1.00
## [13] 20.00 21.00 22.00 23.00 24.00 25.00 26.00 27.00 28.00 2.00 3.00 4.50
## [25] 4.00 5.00 6.00 7.00 8.00 9.00 29.00 0.00
length(intersect(div.novelAUpi$CHROM,div.novelUSpi$CHROM))
## [1] 32
#intersect(div.novelAUpi$SNP,div.novelUSpi$SNP)
length(intersect(div.novelAUpi$SNP,div.novelUSpi$SNP))
## [1] 5177
div.outliers.hiAUpi <- div.outliers.AUUK[which(div.outliers.AUUK$PI_AU > div.outliers.AUUK$PI_UK),]
unique(div.outliers.hiAUpi$CHROM)
## [1] 12.00 13.00 1.25 1.00 23.00 27.00 2.00 3.00 4.50 5.00 6.00 0.00
length(div.outliers.hiAUpi$SNP)
## [1] 91
length(div.outliers.AUUK$SNP)
## [1] 201
length(div.outliers.hiAUpi$SNP)/length(div.outliers.AUUK$SNP)
## [1] 0.4527363
85% of FST outlier windows have higher diversity in AU
div.outliers.hiUSpi <- div.outliers.USUK[which(div.outliers.USUK$PI_US > div.outliers.USUK$PI_UK),]
unique(div.outliers.hiUSpi$CHROM)
## [1] 12.00 17.00 1.25 1.00 2.00 3.00 4.00 6.00 0.00
length(div.outliers.hiUSpi$SNP)
## [1] 61
length(div.outliers.USUK$SNP)
## [1] 201
length(div.outliers.hiUSpi$SNP)/length(div.outliers.USUK$SNP)
## [1] 0.3034826
Only 38% of FST outlier windows have higher diversity in US
intersect(div.outliers.hiUSpi$CHROM,div.outliers.hiAUpi$CHROM)
## [1] 12.00 1.25 1.00 2.00 3.00 6.00 0.00
None of these regions overlap between invasions.
These plots are based on code from Gemma Clucas.
chrom2.div <- div[which(div$CHROM==2),]
unique(chrom2.div$SNP)
## [1] 7875 7876 7877 7878 7879 7880 7881 7882 7883 7884 7885 7886
## [13] 7887 7888 7889 7890 7891 7892 7893 7894 7895 7896 7897 7898
## [25] 7899 7900 7901 7902 7903 7904 7905 7906 7907 7908 7909 7910
## [37] 7911 7912 7913 7914 7915 7916 7917 7918 7919 7920 7921 7922
## [49] 7923 7924 7925 7926 7927 7928 7929 7930 7931 7932 7933 7934
## [61] 7935 7936 7937 7938 7939 7940 7941 7942 7943 7944 7945 7946
## [73] 7947 7948 7949 7950 7951 7952 7953 7954 7955 7956 7957 7958
## [85] 7959 7960 7961 7962 7963 7964 7965 7966 7967 7968 7969 7970
## [97] 7971 7972 7973 7974 7975 7976 7977 7978 7979 7980 7981 7982
## [109] 7983 7984 7985 7986 7987 7988 7989 7990 7991 7992 7993 7994
## [121] 7995 7996 7997 7998 7999 8000 8001 8002 8003 8004 8005 8006
## [133] 8007 8008 8009 8010 8011 8012 8013 8014 8015 8016 8017 8018
## [145] 8019 8020 8021 8022 8023 8024 8025 8026 8027 8028 8029 8030
## [157] 8031 8032 8033 8034 8035 8036 8037 8038 8039 8040 8041 8042
## [169] 8043 8044 8045 8046 8047 8048 8049 8050 8051 8052 8053 8054
## [181] 8055 8056 8057 8058 8059 8060 8061 8062 8063 8064 8065 8066
## [193] 8067 8068 8069 8070 8071 8072 8073 8074 8075 8076 8077 8078
## [205] 8079 8080 8081 8082 8083 8084 8085 8086 8087 8088 8089 8090
## [217] 8091 8092 8093 8094 8095 8096 8097 8098 8099 8100 8101 8102
## [229] 8103 8104 8105 8106 8107 8108 8109 8110 8111 8112 8113 8114
## [241] 8115 8116 8117 8118 8119 8120 8121 8122 8123 8124 8125 8126
## [253] 8127 8128 8129 8130 8131 8132 8133 8134 8135 8136 8137 8138
## [265] 8139 8140 8141 8142 8143 8144 8145 8146 8147 8148 8149 8150
## [277] 8151 8152 8153 8154 8155 8156 8157 8158 8159 8160 8161 8162
## [289] 8163 8164 8165 8166 8167 8168 8169 8170 8171 8172 8173 8174
## [301] 8175 8176 8177 8178 8179 8180 8181 8182 8183 8184 8185 8186
## [313] 8187 8188 8189 8190 8191 8192 8193 8194 8195 8196 8197 8198
## [325] 8199 8200 8201 8202 8203 8204 8205 8206 8207 8208 8209 8210
## [337] 8211 8212 8213 8214 8215 8216 8217 8218 8219 8220 8221 8222
## [349] 8223 8224 8225 8226 8227 8228 8229 8230 8231 8232 8233 8234
## [361] 8235 8236 8237 8238 8239 8240 8241 8242 8243 8244 8245 8246
## [373] 8247 8248 8249 8250 8251 8252 8253 8254 8255 8256 8257 8258
## [385] 8259 8260 8261 8262 8263 8264 8265 8266 8267 8268 8269 8270
## [397] 8271 8272 8273 8274 8275 8276 8277 8278 8279 8280 8281 8282
## [409] 8283 8284 8285 8286 8287 8288 8289 8290 8291 8292 8293 8294
## [421] 8295 8296 8297 8298 8299 8300 8301 8302 8303 8304 8305 8306
## [433] 8307 8308 8309 8310 8311 8312 8313 8314 8315 8316 8317 8318
## [445] 8319 8320 8321 8322 8323 8324 8325 8326 8327 8328 8329 8330
## [457] 8331 8332 8333 8334 8335 8336 8337 8338 8339 8340 8341 8342
## [469] 8343 8344 8345 8346 8347 8348 8349 8350 8351 8352 8353 8354
## [481] 8355 8356 8357 8358 8359 8360 8361 8362 8363 8364 8365 8366
## [493] 8367 8368 8369 8370 8371 8372 8373 8374 8375 8376 8377 8378
## [505] 8379 8380 8381 8382 8383 8384 8385 8386 8387 8388 8389 8390
## [517] 8391 8392 8393 8394 8395 8396 8397 8398 8399 8400 8401 8402
## [529] 8403 8404 8405 8406 8407 8408 8409 8410 8411 8412 8413 8414
## [541] 8415 8416 8417 8418 8419 8420 8421 8422 8423 8424 8425 8426
## [553] 8427 8428 8429 8430 8431 8432 8433 8434 8435 8436 8437 8438
## [565] 8439 8440 8441 8442 8443 8444 8445 8446 8447 8448 8449 8450
## [577] 8451 8452 8453 8454 8455 8456 8457 8458 8459 8460 8461 8462
## [589] 8463 8464 8465 8466 8467 8468 8469 8470 8471 8472 8473 8474
## [601] 8475 8476 8477 8478 8479 8480 8481 8482 8483 8484 8485 8486
## [613] 8487 8488 8489 8490 8491 8492 8493 8494 8495 8496 8497 8498
## [625] 8499 8500 8501 8502 8503 8504 8505 8506 8507 8508 8509 8510
## [637] 8511 8512 8513 8514 8515 8516 8517 8518 8519 8520 8521 8522
## [649] 8523 8524 8525 8526 8527 8528 8529 8530 8531 8532 8533 8534
## [661] 8535 8536 8537 8538 8539 8540 8541 8542 8543 8544 8545 8546
## [673] 8547 8548 8549 8550 8551 8552 8553 8554 8555 8556 8557 8558
## [685] 8559 8560 8561 8562 8563 8564 8565 8566 8567 8568 8569 8570
## [697] 8571 8572 8573 8574 8575 8576 8577 8578 8579 8580 8581 8582
## [709] 8583 8584 8585 8586 8587 8588 8589 8590 8591 8592 8593 8594
## [721] 8595 8596 8597 8598 8599 8600 8601 8602 8603 8604 8605 8606
## [733] 8607 8608 8609 8610 8611 8612 8613 8614 8615 8616 8617 8618
## [745] 8619 8620 8621 8622 8623 8624 8625 8626 8627 8628 8629 8630
## [757] 8631 8632 8633 8634 8635 8636 8637 8638 8639 8640 8641 8642
## [769] 8643 8644 8645 8646 8647 8648 8649 8650 8651 8652 8653 8654
## [781] 8655 8656 8657 8658 8659 8660 8661 8662 8663 8664 8665 8666
## [793] 8667 8668 8669 8670 8671 8672 8673 8674 8675 8676 8677 8678
## [805] 8679 8680 8681 8682 8683 8684 8685 8686 8687 8688 8689 8690
## [817] 8691 8692 8693 8694 8695 8696 8697 8698 8699 8700 8701 8702
## [829] 8703 8704 8705 8706 8707 8708 8709 8710 8711 8712 8713 8714
## [841] 8715 8716 8717 8718 8719 8720 8721 8722 8723 8724 8725 8726
## [853] 8727 8728 8729 8730 8731 8732 8733 8734 8735 8736 8737 8738
## [865] 8739 8740 8741 8742 8743 8744 8745 8746 8747 8748 8749 8750
## [877] 8751 8752 8753 8754 8755 8756 8757 8758 8759 8760 8761 8762
## [889] 8763 8764 8765 8766 8767 8768 8769 8770 8771 8772 8773 8774
## [901] 8775 8776 8777 8778 8779 8780 8781 8782 8783 8784 8785 8786
## [913] 8787 8788 8789 8790 8791 8792 8793 8794 8795 8796 8797 8798
## [925] 8799 8800 8801 8802 8803 8804 8805 8806 8807 8808 8809 8810
## [937] 8811 8812 8813 8814 8815 8816 8817 8818 8819 8820 8821 8822
## [949] 8823 8824 8825 8826 8827 8828 8829 8830 8831 8832 8833 8834
## [961] 8835 8836 8837 8838 8839 8840 8841 8842 8843 8844 8845 8846
## [973] 8847 8848 8849 8850 8851 8852 8853 8854 8855 8856 8857 8858
## [985] 8859 8860 8861 8862 8863 8864 8865 8866 8867 8868 8869 8870
## [997] 8871 8872 8873 8874 8875 8876 8877 8878 8879 8880 8881 8882
## [1009] 8883 8884 8885 8886 8887 8888 8889 8890 8891 8892 8893 8894
## [1021] 8895 8896 8897 8898 8899 8900 8901 8902 8903 8904 8905 8906
## [1033] 8907 8908 8909 8910 8911 8912 8913 8914 8915 8916 8917 8918
## [1045] 8919 8920 8921 8922 8923 8924 8925 8926 8927 8928 8929 8930
## [1057] 8931 8932 8933 8934 8935 8936 8937 8938 8939 8940 8941 8942
## [1069] 8943 8944 8945 8946 8947 8948 8949 8950 8951 8952 8953 8954
## [1081] 8955 8956 8957 8958 8959 8960 8961 8962 8963 8964 8965 8966
## [1093] 8967 8968 8969 8970 8971 8972 8973 8974 8975 8976 8977 8978
## [1105] 8979 8980 8981 8982 8983 8984 8985 8986 8987 8988 8989 8990
## [1117] 8991 8992 8993 8994 8995 8996 8997 8998 8999 9000 9001 9002
## [1129] 9003 9004 9005 9006 9007 9008 9009 9010 9011 9012 9013 9014
## [1141] 9015 9016 9017 9018 9019 9020 9021 9022 9023 9024 9025 9026
## [1153] 9027 9028 9029 9030 9031 9032 9033 9034 9035 9036 9037 9038
## [1165] 9039 9040 9041 9042 9043 9044 9045 9046 9047 9048 9049 9050
## [1177] 9051 9052 9053 9054 9055 9056 9057 9058 9059 9060 9061 9062
## [1189] 9063 9064 9065 9066 9067 9068 9069 9070 9071 9072 9073 9074
## [1201] 9075 9076 9077 9078 9079 9080 9081 9082 9083 9084 9085 9086
## [1213] 9087 9088 9089 9090 9091 9092 9093 9094 9095 9096 9097 9098
## [1225] 9099 9100 9101 9102 9103 9104 9105 9106 9107 9108 9109 9110
## [1237] 9111 9112 9113 9114 9115 9116 9117 9118 9119 9120 9121 9122
## [1249] 9123 9124 9125 9126 9127 9128 9129 9130 9131 9132 9133 9134
## [1261] 9135 9136 9137 9138 9139 9140 9141 9142 9143 9144 9145 9146
## [1273] 9147 9148 9149 9150 9151 9152 9153 9154 9155 9156 9157 9158
## [1285] 9159 9160 9161 9162 9163 9164 9165 9166 9167 9168 9169 9170
## [1297] 9171 9172 9173 9174 9175 9176 9177 9178 9179 9180 9181 9182
## [1309] 9183 9184 9185 9186 9187 9188 9189 9190 9191 9192 9193 9194
## [1321] 9195 9196 9197 9198 9199 9200 9201 9202 9203 9204 9205 9206
## [1333] 9207 9208 9209 9210 9211 9212 9213 9214 9215 9216 9217 9218
## [1345] 9219 9220 9221 9222 9223 9224 9225 9226 9227 9228 9229 9230
## [1357] 9231 9232 9233 9234 9235 9236 9237 9238 9239 9240 9241 9242
## [1369] 9243 9244 9245 9246 9247 9248 9249 9250 9251 9252 9253 9254
## [1381] 9255 9256 9257 9258 9259 9260 9261 9262 9263 9264 9265 9266
## [1393] 9267 9268 9269 9270 9271 9272 9273 9274 9275 9276 9277 9278
## [1405] 9279 9280 9281 9282 9283 9284 9285 9286 9287 9288 9289 9290
## [1417] 9291 9292 9293 9294 9295 9296 9297 9298 9299 9300 9301 9302
## [1429] 9303 9304 9305 9306 9307 9308 9309 9310 9311 9312 9313 9314
## [1441] 9315 9316 9317 9318 9319 9320 9321 9322 9323 9324 9325 9326
## [1453] 9327 9328 9329 9330 9331 9332 9333 9334 9335 9336 9337 9338
## [1465] 9339 9340 9341 9342 9343 9344 9345 9346 9347 9348 9349 9350
## [1477] 9351 9352 9353 9354 9355 9356 9357 9358 9359 9360 9361 9362
## [1489] 9363 9364 9365 9366 9367 9368 9369 9370 9371 9372 9373 9374
## [1501] 9375 9376 9377 9378 9379 9380 9381 9382 9383 9384 9385 9386
## [1513] 9387 9388 9389 9390 9391 9392 9393 9394 9395 9396 9397 9398
## [1525] 9399 9400 9401 9402 9403 9404 9405 9406 9407 9408 9409 9410
## [1537] 9411 9412 9413 9414 9415 9416 9417 9418 9419 9420 9421 9422
## [1549] 9423 9424 9425 9426 9427 9428 9429 9430 9431 9432 9433 9434
## [1561] 9435 9436 9437 9438 9439 9440 9441 9442 9443 9444 9445 9446
## [1573] 9447 9448 9449 9450 9451 9452 9453 9454 9455 9456 9457 9458
## [1585] 9459 9460 9461 9462 9463 9464 9465 9466 9467 9468 9469 9470
## [1597] 9471 9472 9473 9474 9475 9476 9477 9478 9479 9480 9481 9482
## [1609] 9483 9484 9485 9486 9487 9488 9489 9490 9491 9492 9493 9494
## [1621] 9495 9496 9497 9498 9499 9500 9501 9502 9503 9504 9505 9506
## [1633] 9507 9508 9509 9510 9511 9512 9513 9514 9515 9516 9517 9518
## [1645] 9519 9520 9521 9522 9523 9524 9525 9526 9527 9528 9529 9530
## [1657] 9531 9532 9533 9534 9535 9536 9537 9538 9539 9540 9541 9542
## [1669] 9543 9544 9545 9546 9547 9548 9549 9550 9551 9552 9553 9554
## [1681] 9555 9556 9557 9558 9559 9560 9561 9562 9563 9564 9565 9566
## [1693] 9567 9568 9569 9570 9571 9572 9573 9574 9575 9576 9577 9578
## [1705] 9579 9580 9581 9582 9583 9584 9585 9586 9587 9588 9589 9590
## [1717] 9591 9592 9593 9594 9595 9596 9597 9598 9599 9600 9601 9602
## [1729] 9603 9604 9605 9606 9607 9608 9609 9610 9611 9612 9613 9614
## [1741] 9615 9616 9617 9618 9619 9620 9621 9622 9623 9624 9625 9626
## [1753] 9627 9628 9629 9630 9631 9632 9633 9634 9635 9636 9637 9638
## [1765] 9639 9640 9641 9642 9643 9644 9645 9646 9647 9648 9649 9650
## [1777] 9651 9652 9653 9654 9655 9656 9657 9658 9659 9660 9661 9662
## [1789] 9663 9664 9665 9666 9667 9668 9669 9670 9671 9672 9673 9674
## [1801] 9675 9676 9677 9678 9679 9680 9681 9682 9683 9684 9685 9686
## [1813] 9687 9688 9689 9690 9691 9692 9693 9694 9695 9696 9697 9698
## [1825] 9699 9700 9701 9702 9703 9704 9705 9706 9707 9708 9709 9710
## [1837] 9711 9712 9713 9714 9715 9716 9717 9718 9719 9720 9721 9722
## [1849] 9723 9724 9725 9726 9727 9728 9729 9730 9731 9732 9733 9734
## [1861] 9735 9736 9737 9738 9739 9740 9741 9742 9743 9744 9745 9746
## [1873] 9747 9748 9749 9750 9751 9752 9753 9754 9755 9756 9757 9758
## [1885] 9759 9760 9761 9762 9763 9764 9765 9766 9767 9768 9769 9770
## [1897] 9771 9772 9773 9774 9775 9776 9777 9778 9779 9780 9781 9782
## [1909] 9783 9784 9785 9786 9787 9788 9789 9790 9791 9792 9793 9794
## [1921] 9795 9796 9797 9798 9799 9800 9801 9802 9803 9804 9805 9806
## [1933] 9807 9808 9809 9810 9811 9812 9813 9814 9815 9816 9817 9818
## [1945] 9819 9820 9821 9822 9823 9824 9825 9826 9827 9828 9829 9830
## [1957] 9831 9832 9833 9834 9835 9836 9837 9838 9839 9840 9841 9842
## [1969] 9843 9844 9845 9846 9847 9848 9849 9850 9851 9852 9853 9854
## [1981] 9855 9856 9857 9858 9859 9860 9861 9862 9863 9864 9865 9866
## [1993] 9867 9868 9869 9870 9871 9872 9873 9874 9875 9876 9877 9878
## [2005] 9879 9880 9881 9882 9883 9884 9885 9886 9887 9888 9889 9890
## [2017] 9891 9892 9893 9894 9895 9896 9897 9898 9899 9900 9901 9902
## [2029] 9903 9904 9905 9906 9907 9908 9909 9910 9911 9912 9913 9914
## [2041] 9915 9916 9917 9918 9919 9920 9921 9922 9923 9924 9925 9926
## [2053] 9927 9928 9929 9930 9931 9932 9933 9934 9935 9936 9937 9938
## [2065] 9939 9940 9941 9942 9943 9944 9945 9946 9947 9948 9949 9950
## [2077] 9951 9952 9953 9954 9955 9956 9957 9958 9959 9960 9961 9962
## [2089] 9963 9964 9965 9966 9967 9968 9969 9970 9971 9972 9973 9974
## [2101] 9975 9976 9977 9978 9979 9980 9981 9982 9983 9984 9985 9986
## [2113] 9987 9988 9989 9990 9991 9992 9993 9994 9995 9996 9997 9998
## [2125] 9999 10000 10001 10002 10003 10004 10005 10006 10007 10008 10009 10010
## [2137] 10011 10012 10013 10014 10015 10016 10017 10018 10019 10020 10021 10022
## [2149] 10023 10024 10025 10026 10027 10028 10029 10030 10031 10032 10033 10034
## [2161] 10035 10036 10037 10038 10039 10040 10041 10042 10043 10044 10045 10046
## [2173] 10047 10048 10049 10050 10051 10052 10053 10054 10055 10056 10057 10058
## [2185] 10059 10060 10061 10062 10063 10064 10065 10066 10067 10068 10069 10070
## [2197] 10071 10072 10073 10074 10075 10076 10077 10078 10079 10080 10081 10082
## [2209] 10083 10084 10085 10086 10087 10088 10089 10090 10091 10092 10093 10094
## [2221] 10095 10096 10097 10098 10099 10100 10101 10102 10103 10104 10105 10106
## [2233] 10107 10108 10109 10110 10111 10112 10113 10114 10115 10116 10117 10118
## [2245] 10119 10120 10121 10122 10123 10124 10125 10126 10127 10128 10129 10130
## [2257] 10131 10132 10133 10134 10135 10136 10137 10138 10139 10140 10141 10142
## [2269] 10143 10144 10145 10146 10147 10148 10149 10150 10151 10152 10153 10154
## [2281] 10155 10156 10157 10158 10159 10160 10161 10162 10163 10164 10165 10166
## [2293] 10167 10168 10169 10170 10171 10172 10173 10174 10175 10176 10177 10178
## [2305] 10179 10180 10181 10182 10183 10184 10185 10186 10187 10188 10189 10190
## [2317] 10191 10192 10193 10194 10195 10196 10197 10198 10199 10200 10201 10202
## [2329] 10203 10204 10205 10206 10207 10208 10209 10210 10211 10212 10213 10214
## [2341] 10215 10216 10217 10218 10219 10220 10221 10222 10223 10224 10225 10226
## [2353] 10227 10228 10229 10230 10231 10232 10233 10234 10235 10236 10237 10238
## [2365] 10239 10240 10241 10242 10243 10244 10245 10246 10247 10248 10249 10250
## [2377] 10251 10252 10253 10254 10255 10256 10257 10258 10259 10260 10261 10262
## [2389] 10263 10264 10265 10266 10267 10268 10269 10270 10271 10272 10273 10274
## [2401] 10275 10276 10277 10278 10279 10280 10281 10282 10283 10284 10285 10286
## [2413] 10287 10288 10289 10290 10291 10292 10293 10294 10295 10296 10297 10298
## [2425] 10299 10300 10301 10302 10303 10304 10305 10306 10307 10308 10309 10310
## [2437] 10311 10312 10313 10314 10315 10316 10317 10318 10319 10320 10321 10322
## [2449] 10323 10324 10325 10326 10327 10328 10329 10330 10331 10332 10333 10334
## [2461] 10335 10336 10337 10338 10339 10340 10341 10342 10343 10344 10345 10346
## [2473] 10347 10348 10349 10350 10351 10352 10353 10354 10355 10356 10357 10358
## [2485] 10359 10360 10361 10362 10363 10364 10365 10366 10367 10368 10369 10370
## [2497] 10371 10372 10373 10374 10375 10376 10377 10378 10379 10380 10381 10382
## [2509] 10383 10384 10385 10386 10387 10388 10389 10390 10391 10392 10393 10394
## [2521] 10395 10396 10397 10398 10399 10400 10401 10402 10403 10404 10405 10406
## [2533] 10407 10408 10409 10410 10411 10412 10413 10414 10415 10416 10417 10418
## [2545] 10419 10420 10421 10422 10423 10424 10425 10426 10427 10428 10429 10430
## [2557] 10431 10432 10433 10434 10435 10436 10437 10438 10439 10440 10441 10442
## [2569] 10443 10444 10445 10446 10447 10448 10449 10450 10451 10452 10453 10454
## [2581] 10455 10456 10457 10458 10459 10460 10461 10462 10463 10464 10465 10466
## [2593] 10467 10468 10469 10470 10471 10472 10473 10474 10475 10476 10477 10478
## [2605] 10479 10480 10481 10482 10483 10484 10485 10486 10487 10488 10489 10490
## [2617] 10491 10492 10493 10494 10495 10496 10497 10498 10499 10500 10501 10502
## [2629] 10503 10504 10505 10506 10507 10508 10509 10510 10511 10512 10513 10514
## [2641] 10515 10516 10517 10518 10519 10520 10521 10522 10523 10524 10525 10526
## [2653] 10527 10528 10529 10530 10531 10532 10533 10534 10535 10536 10537 10538
## [2665] 10539 10540 10541 10542 10543 10544 10545 10546 10547 10548 10549 10550
## [2677] 10551 10552 10553 10554 10555 10556 10557 10558 10559 10560 10561 10562
## [2689] 10563 10564 10565 10566 10567 10568 10569 10570 10571 10572 10573 10574
## [2701] 10575 10576 10577 10578 10579 10580 10581 10582 10583 10584 10585 10586
## [2713] 10587 10588 10589 10590 10591 10592 10593 10594 10595 10596 10597 10598
## [2725] 10599 10600 10601 10602 10603 10604 10605 10606 10607 10608 10609 10610
## [2737] 10611 10612 10613 10614 10615 10616 10617 10618 10619 10620 10621 10622
## [2749] 10623 10624 10625 10626 10627 10628 10629 10630 10631 10632 10633 10634
## [2761] 10635 10636 10637 10638 10639 10640 10641 10642 10643 10644 10645 10646
## [2773] 10647 10648 10649 10650 10651 10652 10653 10654 10655 10656 10657 10658
## [2785] 10659 10660 10661 10662 10663 10664 10665 10666 10667 10668 10669 10670
## [2797] 10671 10672 10673 10674 10675 10676 10677 10678 10679 10680 10681 10682
## [2809] 10683 10684 10685 10686 10687 10688 10689 10690 10691 10692 10693 10694
## [2821] 10695 10696 10697 10698 10699 10700 10701 10702 10703 10704 10705 10706
## [2833] 10707 10708 10709 10710 10711 10712 10713 10714 10715 10716 10717 10718
## [2845] 10719 10720 10721 10722 10723 10724 10725 10726 10727 10728 10729 10730
## [2857] 10731 10732 10733 10734 10735 10736 10737 10738 10739 10740 10741 10742
## [2869] 10743 10744 10745 10746 10747 10748 10749 10750 10751 10752 10753 10754
## [2881] 10755 10756 10757 10758 10759 10760 10761 10762 10763 10764 10765 10766
## [2893] 10767 10768 10769 10770 10771 10772 10773 10774 10775 10776 10777 10778
## [2905] 10779 10780 10781 10782 10783 10784 10785 10786 10787 10788 10789 10790
## [2917] 10791 10792 10793 10794 10795 10796 10797 10798 10799 10800 10801 10802
## [2929] 10803 10804 10805 10806 10807 10808 10809 10810 10811 10812 10813 10814
## [2941] 10815 10816 10817 10818 10819 10820 10821 10822 10823 10824 10825 10826
## [2953] 10827 10828 10829 10830 10831 10832 10833 10834 10835 10836 10837 10838
## [2965] 10839 10840 10841 10842 10843 10844 10845 10846 10847 10848 10849 10850
## [2977] 10851 10852 10853 10854 10855 10856 10857 10858 10859 10860 10861 10862
## [2989] 10863 10864 10865 10866 10867 10868 10869 10870 10871
chrom2.div.small <- chrom2.div[which(chrom2.div$SNP < 8980),]
chrom2.div.small <- chrom2.div.small[which(chrom2.div.small$SNP > 8600),]
# 39200001 to 51650000
tail(chrom2.div.small)
## CHROM BIN_START BIN_END N_VARIANTS WEIGHTED_FST_UKUS MEAN_FST_UKUS
## 8974 2 54950001 55000000 888 0.00735755 0.00381462
## 8975 2 55000001 55050000 933 0.00726278 0.00291817
## 8976 2 55050001 55100000 914 0.01519790 0.01316560
## 8977 2 55100001 55150000 953 0.00000000 0.00000000
## 8978 2 55150001 55200000 950 0.02453080 0.01712540
## 8979 2 55200001 55250000 1123 0.04151250 0.03948120
## WEIGHTED_FST_AUUK MEAN_FST_AUUK WEIGHTED_FST_USAU MEAN_FST_USAU PI_UK
## 8974 0.0276795 0.0190044 0.00474647 0.000776248 0.00628362
## 8975 0.0713227 0.0569079 0.02199250 0.012000000 0.00591970
## 8976 0.0273716 0.0252403 0.02200510 0.021276300 0.00620980
## 8977 0.0000000 0.0000000 0.00000000 0.000000000 0.00641686
## 8978 0.0316370 0.0245065 0.00000000 0.000000000 0.00571444
## 8979 0.0553261 0.0473515 0.00000000 0.000000000 0.00691236
## PI_US PI_AU TajimaD_UK TajimaD_US TajimaD_AU SNP piUK.piAU
## 8974 0.00569739 0.00600235 0.879169 0.756094 0.883948 8974 0.00028127
## 8975 0.00610279 0.00580834 0.666973 0.503085 0.723701 8975 0.00011136
## 8976 0.00523177 0.00562215 0.725060 0.274965 0.841877 8976 0.00058765
## 8977 0.00635051 0.00638269 0.712378 0.737009 0.851269 8977 0.00003417
## 8978 0.00595467 0.00629788 0.408608 0.485645 0.808132 8978 -0.00058344
## 8979 0.00762392 0.00753313 0.871839 0.848962 0.897227 8979 -0.00062077
## piUK.piUS
## 8974 0.00058623
## 8975 -0.00018309
## 8976 0.00097803
## 8977 0.00006635
## 8978 -0.00024023
## 8979 -0.00071156
quartz(height=5,width=7)
options(scipen=999)
par(mfrow=c(2,1)) # set rows
par(mar=c(0,2,0.5,2)) # set margins for each plot
plot((chrom2.div.small$BIN_START), chrom2.div.small$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.5))
lines((chrom2.div.small$BIN_START), chrom2.div.small$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.5))
lines((chrom2.div.small$BIN_START), chrom2.div.small$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.5))
lines((chrom2.div.small$BIN_START), chrom2.div.small$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.2,0.5))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$PI_AU, type="n", axes=FALSE, bty="n", ylim=c(0,0.04))
lines((chrom2.div.small$BIN_START), chrom2.div.small$PI_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$PI_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.04))
lines((chrom2.div.small$BIN_START), chrom2.div.small$PI_US, col="#2c81a8", lwd=2)
axis(side=4, ylim=c(0,0.04))
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$PI_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.04))
lines((chrom2.div.small$BIN_START), chrom2.div.small$PI_UK, col="#39C855", lwd=1)
par(mar=c(1,2,1,2))
plot((chrom2.div.small$BIN_START), chrom2.div.small$TajimaD_AU, type="n",axes=FALSE, bty="n", xlab=NA, ylim=c(-2.4,2.6))
lines((chrom2.div.small$BIN_START), chrom2.div.small$TajimaD_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$TajimaD_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,2.6))
lines((chrom2.div.small$BIN_START), chrom2.div.small$TajimaD_US, col="#2c81a8",lwd=2)
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$TajimaD_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,2.6))
lines((chrom2.div.small$BIN_START), chrom2.div.small$TajimaD_UK, col="#39C855", lwd=1)
axis(side=2, ylim=c(-2.4,2.4)) # tajima's D axis
axis(side=1)
quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Chromosome2.ManhattanZoom.pdf", type="pdf")
## quartz_off_screen
## 2
chrom6.div <- div[which(div$CHROM==6),]
# runs from 16155 to 16871
# chrom 6 = 716 50kb windows ("SNPs" here)
chrom6.div.small <- chrom6.div[which(chrom6.div$SNP < 16325),]
chrom6.div.small <- chrom6.div.small[which(chrom6.div.small$SNP > 16225),]
head(chrom6.div.small)
## CHROM BIN_START BIN_END N_VARIANTS WEIGHTED_FST_UKUS MEAN_FST_UKUS
## 16226 6 3550001 3600000 570 0.01206770 0.008669440
## 16227 6 3600001 3650000 528 0.03079550 0.023766500
## 16228 6 3650001 3700000 701 0.02597620 0.020965100
## 16229 6 3700001 3750000 805 0.00338468 0.000400509
## 16230 6 3750001 3800000 830 0.02082790 0.017915200
## 16231 6 3800001 3850000 800 0.00589529 0.001372410
## WEIGHTED_FST_AUUK MEAN_FST_AUUK WEIGHTED_FST_USAU MEAN_FST_USAU
## 16226 0.0367649 0.0305788 0.0392762 0.0368076
## 16227 0.0501026 0.0406660 0.0739191 0.0613789
## 16228 0.0711590 0.0593933 0.0454643 0.0368937
## 16229 0.0754966 0.0604820 0.0708642 0.0562380
## 16230 0.0375712 0.0322166 0.0438655 0.0358050
## 16231 0.1001270 0.0672706 0.0743417 0.0579428
## PI_UK PI_US PI_AU TajimaD_UK TajimaD_US TajimaD_AU SNP
## 16226 0.00366513 0.00384209 0.00372947 0.592114 0.972481 0.785959 16226
## 16227 0.00349155 0.00329177 0.00364804 0.682877 0.782069 0.892366 16227
## 16228 0.00468438 0.00458936 0.00478475 0.859767 0.784092 0.972961 16228
## 16229 0.00535166 0.00537373 0.00535428 0.706416 0.819633 0.873451 16229
## 16230 0.00555363 0.00537467 0.00539254 0.840503 0.753410 0.770190 16230
## 16231 0.00531258 0.00519576 0.00516348 0.695610 0.790285 0.812897 16231
## piUK.piAU piUK.piUS
## 16226 -0.00006434 -0.00017696
## 16227 -0.00015649 0.00019978
## 16228 -0.00010037 0.00009502
## 16229 -0.00000262 -0.00002207
## 16230 0.00016109 0.00017896
## 16231 0.00014910 0.00011682
chrom6.div.hifst <- chrom6.div[which(chrom6.div$SNP < 16281),]
chrom6.div.hifst <- chrom6.div.small[which(chrom6.div.small$SNP > 16263),]
# AUUK high fst: window 5350001 to 6300001 on Chrom 6
# "SNP" 16263 to 16281
mean(chrom6.div.hifst$PI_UK)
## [1] 0.002873224
mean(chrom6.div.hifst$PI_US)
## [1] 0.002896161
mean(chrom6.div.hifst$PI_AU)
## [1] 0.00293987
chrom6.div.med <- chrom6.div[which(chrom6.div$SNP < 16450),]
chrom6.div.med <- chrom6.div.med[which(chrom6.div.med$SNP > 16155),]
quartz(height=5,width=7)
options(scipen=999)
par(mfrow=c(2,1))
par(mar=c(0,2,0.5,2))
plot((chrom6.div.small$BIN_START), chrom6.div.small$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom6.div.small$BIN_START), chrom6.div.small$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom6.div.small$BIN_START), chrom6.div.small$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom6.div.small$BIN_START), chrom6.div.small$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.2,0.4))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$PI_AU, type="n", axes=FALSE, bty="n", ylim=c(0,0.04))
lines((chrom6.div.small$BIN_START), chrom6.div.small$PI_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$PI_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.04))
lines((chrom6.div.small$BIN_START), chrom6.div.small$PI_US, col="#2c81a8", lwd=2)
axis(side=4, ylim=c(0,0.03))
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$PI_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.04))
lines((chrom6.div.small$BIN_START), chrom6.div.small$PI_UK, col="#39C855", lwd=1)
par(mar=c(1,2,1,2))
plot((chrom6.div.small$BIN_START), chrom6.div.small$TajimaD_AU, type="n",axes=FALSE, bty="n", xlab=NA, ylim=c(-2.4,2.4))
lines((chrom6.div.small$BIN_START), chrom6.div.small$TajimaD_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$TajimaD_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,2.4))
lines((chrom6.div.small$BIN_START), chrom6.div.small$TajimaD_US, col="#2c81a8",lwd=2)
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$TajimaD_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,2.4))
lines((chrom6.div.small$BIN_START), chrom6.div.small$TajimaD_UK, col="#39C855", lwd=1)
axis(side=2, ylim=c(-2.4,2.4))
axis(side=1)
quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Chromosome6.ManhattanZoom.pdf", type="pdf")
## quartz_off_screen
## 2
quartz(height=3,width=7)
options(scipen=999)
par(new=T)
## Warning in par(new = T): calling par(new=TRUE) with no plot
plot((chrom6.div.med$BIN_START), chrom6.div.med$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE, ylim=c(-0.01,0.4))
lines((chrom6.div.med$BIN_START), chrom6.div.med$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom6.div.med$BIN_START), chrom6.div.med$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE, ylim=c(-0.01,0.4))
lines((chrom6.div.med$BIN_START), chrom6.div.med$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom6.div.med$BIN_START), chrom6.div.med$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE, ylim=c(-0.01,0.4))
lines((chrom6.div.med$BIN_START), chrom6.div.med$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.01,0.4))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Chromosome6.BroaderFSTaroundPeak.pdf", type="pdf")
## quartz_off_screen
## 2
chrom1.div <- div[which(div$CHROM==1),]
chrom1.div.small <- chrom1.div[which(chrom1.div$SNP < 6700),]
chrom1.div.small <- chrom1.div.small[which(chrom1.div.small$SNP > 6400),]
quartz(height=5,width=7)
options(scipen=999)
par(mfrow=c(2,1))
par(mar=c(0,2,0.5,2))
plot((chrom1.div.small$BIN_START), chrom1.div.small$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom1.div.small$BIN_START), chrom1.div.small$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom1.div.small$BIN_START), chrom1.div.small$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom1.div.small$BIN_START), chrom1.div.small$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.2,0.4))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$PI_AU, type="n", axes=FALSE, bty="n", ylim=c(0,0.03))
lines((chrom1.div.small$BIN_START), chrom1.div.small$PI_AU, col="#F8DD9E", lwd=2)
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$PI_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.03))
lines((chrom1.div.small$BIN_START), chrom1.div.small$PI_US, col="#66A3C0", lwd=2)
axis(side=4, ylim=c(0,0.03))
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$PI_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.03))
lines((chrom1.div.small$BIN_START), chrom1.div.small$PI_UK, col="#39C855", lwd=1)
par(mar=c(1,2,1,2))
plot((chrom1.div.small$BIN_START), chrom1.div.small$TajimaD_AU, type="n",axes=FALSE, bty="n", xlab=NA, ylim=c(-2.4,3.4))
lines((chrom1.div.small$BIN_START), chrom1.div.small$TajimaD_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$TajimaD_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,3.4))
lines((chrom1.div.small$BIN_START), chrom1.div.small$TajimaD_US, col="#2c81a8",lwd=2)
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$TajimaD_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,3.4))
lines((chrom1.div.small$BIN_START), chrom1.div.small$TajimaD_UK, col="#39C855", lwd=1)
axis(side=2, ylim=c(-2.4,3.4))
axis(side=1)
quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Chromosome1.ManhattanZoom.pdf", type="pdf")
## quartz_off_screen
## 2
chrom1A.div <- div[which(div$CHROM==1.25),]
# 2896 to 4342
# 1446 windows, 3 ticks
chrom1A.div.small <- chrom1A.div[which(chrom1A.div$SNP < 4000),]
chrom1A.div.small <- chrom1A.div.small[which(chrom1A.div.small$SNP > 3700),]
quartz(height=5,width=7)
options(scipen=999)
par(mfrow=c(2,1))
par(mar=c(0,2,0.5,2))
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.2,0.4))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$PI_AU, type="n", axes=FALSE, bty="n", ylim=c(0,0.03))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$PI_AU, col="#F8DD9E", lwd=2)
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$PI_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.03))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$PI_US, col="#66A3C0", lwd=2)
axis(side=4, ylim=c(0,0.03))
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$PI_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.03))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$PI_UK, col="#39C855", lwd=1)
par(mar=c(1,2,1,2))
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$TajimaD_AU, type="n",axes=FALSE, bty="n", xlab=NA, ylim=c(-2.4,3.4))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$TajimaD_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$TajimaD_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,3.4))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$TajimaD_US, col="#2c81a8",lwd=2)
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$TajimaD_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,3.4))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$TajimaD_UK, col="#39C855", lwd=1)
axis(side=2, ylim=c(-2.4,3.4))
axis(side=1)
quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Chromosome1A.ManhattanZoom.pdf", type="pdf")
## quartz_off_screen
## 2
chrom4.div <- div[which(div$CHROM==4),]
# 13506 to 14923,
# 1417 windows, 7 ticks - peak ~500 windows from start
chrom4.div.small <- chrom4.div[which(chrom4.div$SNP < 14150),]
chrom4.div.small <- chrom4.div.small[which(chrom4.div.small$SNP > 13850),]
quartz(height=5,width=7)
options(scipen=999)
par(mfrow=c(2,1))
par(mar=c(0,2,0.5,2))
plot((chrom4.div.small$BIN_START), chrom4.div.small$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom4.div.small$BIN_START), chrom4.div.small$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom4.div.small$BIN_START), chrom4.div.small$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom4.div.small$BIN_START), chrom4.div.small$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.2,0.4))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$PI_AU, type="n", axes=FALSE, bty="n", ylim=c(0,0.03))
lines((chrom4.div.small$BIN_START), chrom4.div.small$PI_AU, col="#F8DD9E", lwd=2)
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$PI_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.03))
lines((chrom4.div.small$BIN_START), chrom4.div.small$PI_US, col="#66A3C0", lwd=2)
axis(side=4, ylim=c(0,0.03))
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$PI_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.03))
lines((chrom4.div.small$BIN_START), chrom4.div.small$PI_UK, col="#39C855", lwd=1)
par(mar=c(1,2,1,2))
plot((chrom4.div.small$BIN_START), chrom4.div.small$TajimaD_AU, type="n",axes=FALSE, bty="n", xlab=NA, ylim=c(-2.4,3.4))
lines((chrom4.div.small$BIN_START), chrom4.div.small$TajimaD_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$TajimaD_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,3.4))
lines((chrom4.div.small$BIN_START), chrom4.div.small$TajimaD_US, col="#2c81a8",lwd=2)
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$TajimaD_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,3.4))
lines((chrom4.div.small$BIN_START), chrom4.div.small$TajimaD_UK, col="#39C855", lwd=1)
axis(side=2, ylim=c(-2.4,3.4))
axis(side=1)
quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Chromosome4.ManhattanZoom.pdf", type="pdf")
## quartz_off_screen
## 2
chrom4A.div <- div[which(div$CHROM==4.5),]
# 13097 to 13505
# 408 windows, 4 ticks
chrom4A.div.small <- chrom4A.div[which(chrom4A.div$SNP < 13300),]
chrom4A.div.small <- chrom4A.div.small[which(chrom4A.div.small$SNP > 13150),]
quartz(height=5,width=7)
options(scipen=999)
par(mfrow=c(2,1))
par(mar=c(0,2,0.5,2))
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.2,0.4))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$PI_AU, type="n", axes=FALSE, bty="n", ylim=c(0,0.03))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$PI_AU, col="#F8DD9E", lwd=2)
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$PI_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.03))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$PI_US, col="#66A3C0", lwd=2)
axis(side=4, ylim=c(0,0.03))
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$PI_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.03))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$PI_UK, col="#39C855", lwd=1)
par(mar=c(1,2,1,2))
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$TajimaD_AU, type="n",axes=FALSE, bty="n", xlab=NA, ylim=c(-2.4,3.4))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$TajimaD_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$TajimaD_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,3.4))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$TajimaD_US, col="#2c81a8",lwd=2)
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$TajimaD_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,3.4))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$TajimaD_UK, col="#39C855", lwd=1)
axis(side=2, ylim=c(-2.4,3.4))
axis(side=1)
quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Chromosome4A.ManhattanZoom.pdf", type="pdf")
## quartz_off_screen
## 2